In [38]:
import pandas as pd
In [39]:
df = pd.read_csv('scraped_and_cleand_six.csv')
In [40]:
df.info()
In [11]:
df['Share Total'].apply(comma)
Out[11]:
In [12]:
df['Share Total'] = df['Share Total'].apply(comma)
In [13]:
df.info()
In [14]:
df['Share Total'] = df['Share Total'].astype(int)
In [15]:
df.info()
In [16]:
df['Share Total'].sort_values(ascending=False).head(20)
Out[16]:
In [17]:
df[df['Share Total']==27205398]
Out[17]:
In [18]:
df.sort_values(by='Company', ascending=True).head()
Out[18]:
In [19]:
df['Price'].apply(comma)
In [20]:
def comma(elem):
try:
elem = elem.replace("'", '').split('.')[0]
return int(elem)
except:
return elem
In [21]:
test = "26767.89"
In [22]:
int(test.split(".")[0])
Out[22]:
In [23]:
df['Price'] = df['Price'].apply(comma)
In [24]:
df.info()
In [25]:
df = df.dropna()
In [26]:
df.info()
In [27]:
df['Price'] = df['Price'].apply(comma)
Price in Millions
In [28]:
df['Price_m'] = round(df['Price'] / 1000000, 1)
In [29]:
df.sort_values(by='Price_m', ascending=False).head()
Out[29]:
In [30]:
df['Type'].value_counts()
Out[30]:
In [31]:
dfs = df[df['Type']=='Sale']
In [32]:
dfs.groupby('Company')['Price_m'].sum().sort_values(ascending=False).head(10)
Out[32]:
In [33]:
dfp = df[df['Type']=='Purchase']
In [34]:
dfp.groupby('Company')['Price_m'].sum().sort_values(ascending=False).head(10)
Out[34]:
In [36]:
df.to_csv('scraped_and_cleand_six.csv')
In [221]:
df.info()
In [222]:
df['Date'].head()
Out[222]:
In [223]:
from datetime import datetime
In [224]:
pd.to_datetime(df['Date'], format='%d.%m.%Y')
Out[224]:
In [225]:
df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y')
In [226]:
df.info()
In [230]:
df.index = df['Date']
In [232]:
df.resample('W')['Date'].count().head()
Out[232]:
In [233]:
import matplotlib.pyplot as plt
import matplotlib
plt.style.use('ggplot')
%matplotlib inline
In [234]:
df.resample('W')['Date'].count().plot()
Out[234]:
In [235]:
df.resample('W')['Price'].sum().plot()
Out[235]:
In [237]:
df.resample('W')['Price'].sum().plot()
plt.savefig('hello.pdf')
In [241]:
df.resample('M')['Price'].sum().plot(kind='bar')
Out[241]:
In [249]:
df.resample('M')['Price'].sum().plot(kind='barh')
Out[249]:
In [252]:
df.resample('M')['Price'].sum().plot(kind='pie')
Out[252]:
In [255]:
df.resample('M')['Price'].sum().plot(kind='pie', radius=0.5, shadow=True)
Out[255]:
In [256]:
df.resample('Q')['Price'].sum().plot(kind='pie', radius=0.5, shadow=True)
Out[256]:
In [268]:
labels = 'Q1', 'Q2', 'Q3', 'Q4'
colors = ['yellowgreen', 'grey', 'grey', 'grey']
explode = (0.05, 0.05, 0.05, 0.05)
plt.axis('equal')
df.resample('Q')['Price'].sum().plot(kind='pie', radius=0.5, autopct='%1.1f%%', shadow=False, labels=labels,colors=colors, explode=explode)
Out[268]:
In [ ]: